require(quanteda)
require(LSX)
require(gplots)

dict <- dictionary(file = "temporality.yml")
toks <- readRDS("data/tokens_en.rds")
toks <- tokens_select(toks, min_nchar = 2) %>% 
    tokens_compound(dict$en, concatenator = " ")
dfmt <- dfm(toks) %>% 
    dfm_trim(min_termfreq = 2)

dfmt_future <- dfm_select(dfmt, dict$en$future["main"])
dfmt_future <- dfmt_future * as.integer(rowSums(dfm_select(dfmt, dict$en$future["aux"])) > 0)
colnames(dfmt_future) <- paste0(colnames(dfmt_future), "/future")

dfmt_perfect <- dfm_select(dfmt, dict$en$perfect["main"])
dfmt_perfect <- dfmt_perfect * as.integer(rowSums(dfm_select(dfmt, dict$en$perfect["aux"])) > 0)
colnames(dfmt_perfect) <- paste0(colnames(dfmt_perfect), "/past")

dfmt2 <- dfm_remove(dfmt, c(dict$en))
dfmt3 <- cbind(dfmt2, dfmt_future, dfmt_perfect)

seed <- c("*/future" = 1, "*/past" = -1)
lss <- textmodel_lss(dfmt3, seed, cache = TRUE, k = 300, slice = 100)

toks_test <- readRDS("data/tokens_test_en.rds")
dfmt_test <- dfm(toks_test)
dat <- docvars(dfmt_test)
dat$lss <- predict(lss, dfmt_test, min_n = 10)
dat$lss_bin <- cut(dat$lss, c(-Inf, -3:3, Inf))

saveRDS(lss, "lss_en.rds")
saveRDS(dat, "data_lss_en.rds")
